Last compiled: 2020-12-29
Goal
In the previous section, we predicted whether or not a product will be put on ‘backorder’ status using H2O model. We now take the H2O models developed to inspect, visualize, and communicate performance to business stakeholders.
These are some relevant questions to ask ponder:
cowplot package?
For this, I will be reusing the Product Backorders data set (source of raw data is linked below). You may download the data in case you want to try this code on your own.
Please note this is a continuation of the previous section.
Raw data source:
Download product_backorders.csv
cowplot package
As a first step, please load tidyverse and tidymodels libraries. For details on what these libraries offer, please refer to the comments in the code block below.
# STEP 1: Load Libraries ---
# Tidy, Transform, & Visualize
library(tidyverse)
# library(tibble) --> is a modern re-imagining of the data frame
# library(readr) --> provides a fast and friendly way to read rectangular data like csv
# library(dplyr) --> provides a grammar of data manipulation
# library(magrittr) --> offers a set of operators which make your code more readable (pipe operator)
# library(tidyr) --> provides a set of functions that help you get to tidy data
# library(stringr) --> provides a cohesive set of functions designed to make working with strings as easy as possible
# library(ggplot2) --> graphics
library(tidymodels)
# library(rsample) --> provides infrastructure for efficient data splitting, resampling and cross validation.
# library(parsnip) --> provides an API to many powerful modeling algorithms in R.
# library(recipes) --> tidy interface to data pre-processing (making statistical transformations) tools for feature engineering (prior to modeling).
# library(workflows) --> bundle your pre-processing, modeling, and post-processing together.
# library(tune) --> helps you optimize the hyperparameters of your model and pre-processing steps.
# library(yardstick) --> measures the effectiveness of models using performance metrics (metrics for model comparison).
# library(broom) --> converts the information in common statistical R objects into user-friendly, predictable formats.
# library(dials) --> creates and manages tuning parameters and parameter grids.
library(h2o) # H2O modeling
library(ggthemes) # Better themes for plotting and color palettes
library(glue) # Implementation of interpreted string literals
library(cowplot) # Provides various features to help create publication-quality figures
If you haven’t installed these packages, please install them by calling install.packages([name_of_package]) in the R console. After installing, run the above code block again.
# Visualize the H2O leaderboard to help with model selection
data_transformed_tbl <- automl_models_h2o@leaderboard %>%
as_tibble() %>%
select(-c(aucpr, mean_per_class_error, rmse, mse)) %>%
mutate(model_type = str_extract(model_id, "[^_]+")) %>%
slice(1:n()) %>%
rownames_to_column(var = "rowname") %>%
# Visually this step will not change anything
# It reorders the factors under the hood
mutate(
model_id = as_factor(model_id) %>% reorder(auc),
model_type = as.factor(model_type)
) %>%
pivot_longer(cols = -c(model_id, model_type, rowname),
names_to = "key",
values_to = "value",
names_transform = list(key = forcats::fct_inorder)
) %>%
mutate(model_id = paste0(rowname, ". ", model_id) %>% as_factor() %>% fct_rev())
# Perform visualization
data_transformed_tbl %>%
ggplot(aes(value, model_id, color = model_type)) +
geom_point(size = 3) +
geom_label(aes(label = round(value, 3), hjust = "inward"), show.legend = F) +
scale_color_gdocs() +
# Facet to break out logloss and auc
facet_wrap(~ toupper(key), scales = "free_x") +
labs(title = "Leaderboard Metrics",
subtitle = paste0("Ordered by: ", "AUC (Area Under the Curve)"),
y = "Model Postion, Model ID", x = "") +
theme(legend.position = "bottom")
# Extracts an H2O model name by a position so can more easily use h2o.getModel()
extract_h2o_model_name_by_position <- function(h2o_leaderboard, n = 1, verbose = T) {
model_name <- h2o_leaderboard %>%
as.tibble() %>%
slice(n) %>%
pull(model_id)
if (verbose) message(model_name)
return(model_name)
}
# Save multiple models by extracting from leaderboard
for (num in c(1,2,3,15,16,17)){
automl_models_h2o@leaderboard %>%
extract_h2o_model_name_by_position(num) %>%
h2o.getModel() %>%
h2o.saveModel(path = "../03_ml_automated/modeling/h2o_models/")
}
# Loading Distributed Random Forest model
drf_h2o <- h2o.loadModel("../03_ml_automated/modeling/h2o_models/DRF_1_AutoML_20201229_021944")
# Take a look at the metrics on the training data set
drf_h2o
## Model Details:
## ==============
##
## H2OBinomialModel: drf
## Model ID: DRF_1_AutoML_20201229_021944
## Model Summary:
## number_of_trees number_of_internal_trees model_size_in_bytes min_depth max_depth mean_depth min_leaves
## 1 5 5 43213 20 20 20.00000 592
## max_leaves mean_leaves
## 1 740 677.80000
##
##
## H2OBinomialMetrics: drf
## ** Reported on training data. **
## ** Metrics reported on Out-Of-Bag training samples **
##
## MSE: 0.08551364
## RMSE: 0.2924271
## LogLoss: 1.498955
## Mean Per-Class Error: 0.2455189
## AUC: 0.8084692
## AUCPR: 0.4648975
## Gini: 0.6169383
## R^2: 0.1895136
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 7910 621 0.072793 =621/8531
## Yes 486 676 0.418244 =486/1162
## Totals 8396 1297 0.114206 =1107/9693
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.335125 0.549817 168
## 2 max f2 0.144972 0.608650 252
## 3 max f0point5 0.545455 0.556081 91
## 4 max accuracy 0.606019 0.898587 73
## 5 max precision 0.814286 0.637131 26
## 6 max recall 0.000000 1.000000 399
## 7 max specificity 1.000000 0.981479 0
## 8 max absolute_mcc 0.335125 0.485604 168
## 9 max min_per_class_accuracy 0.047589 0.761618 330
## 10 max mean_per_class_accuracy 0.144972 0.778398 252
## 11 max tns 1.000000 8373.000000 0
## 12 max fns 1.000000 922.000000 0
## 13 max fps 0.000000 8531.000000 399
## 14 max tps 0.000000 1162.000000 399
## 15 max tnr 1.000000 0.981479 0
## 16 max fnr 1.000000 0.793460 0
## 17 max fpr 0.000000 1.000000 399
## 18 max tpr 0.000000 1.000000 399
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: drf
## ** Reported on validation data. **
##
## MSE: 0.06780781
## RMSE: 0.2603993
## LogLoss: 0.3822429
## Mean Per-Class Error: 0.1815999
## AUC: 0.9037786
## AUCPR: 0.5983543
## Gini: 0.8075572
## R^2: 0.3694531
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 2869 231 0.074516 =231/3100
## Yes 125 308 0.288684 =125/433
## Totals 2994 539 0.100764 =356/3533
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.276302 0.633745 193
## 2 max f2 0.201544 0.704945 238
## 3 max f0point5 0.507993 0.627923 99
## 4 max accuracy 0.507993 0.908576 99
## 5 max precision 0.726497 0.771739 31
## 6 max recall 0.000001 1.000000 399
## 7 max specificity 1.000000 0.997742 0
## 8 max absolute_mcc 0.276302 0.580775 193
## 9 max min_per_class_accuracy 0.182160 0.839355 245
## 10 max mean_per_class_accuracy 0.136886 0.841335 265
## 11 max tns 1.000000 3093.000000 0
## 12 max fns 1.000000 418.000000 0
## 13 max fps 0.000001 3100.000000 399
## 14 max tps 0.000001 433.000000 399
## 15 max tnr 1.000000 0.997742 0
## 16 max fnr 1.000000 0.965358 0
## 17 max fpr 0.000001 1.000000 399
## 18 max tpr 0.000001 1.000000 399
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## H2OBinomialMetrics: drf
## ** Reported on cross-validation data. **
## ** 5-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
##
## MSE: 0.06630903
## RMSE: 0.2575054
## LogLoss: 0.2961603
## Mean Per-Class Error: 0.1980823
## AUC: 0.9006669
## AUCPR: 0.6176905
## Gini: 0.8013338
## R^2: 0.3666239
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 8787 692 0.073003 =692/9479
## Yes 413 865 0.323161 =413/1278
## Totals 9200 1557 0.102724 =1105/10757
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.283138 0.610229 204
## 2 max f2 0.189881 0.686009 251
## 3 max f0point5 0.426218 0.622722 146
## 4 max accuracy 0.433292 0.910477 143
## 5 max precision 0.987500 0.920000 1
## 6 max recall 0.000003 1.000000 399
## 7 max specificity 1.000000 0.999789 0
## 8 max absolute_mcc 0.283138 0.555299 204
## 9 max min_per_class_accuracy 0.168537 0.816901 265
## 10 max mean_per_class_accuracy 0.139044 0.832829 279
## 11 max tns 1.000000 9477.000000 0
## 12 max fns 1.000000 1257.000000 0
## 13 max fps 0.000003 9479.000000 399
## 14 max tps 0.000003 1278.000000 399
## 15 max tnr 1.000000 0.999789 0
## 16 max fnr 1.000000 0.983568 0
## 17 max fpr 0.000003 1.000000 399
## 18 max tpr 0.000003 1.000000 399
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
## Cross-Validation Metrics Summary:
## mean sd cv_1_valid cv_2_valid cv_3_valid cv_4_valid cv_5_valid
## accuracy 0.9008095 0.008657596 0.8894052 0.9042751 0.90516037 0.8944677 0.9107392
## auc 0.90056276 0.006183793 0.89803165 0.901327 0.89267915 0.90107244 0.9097036
## aucpr 0.6195214 0.013277665 0.6197426 0.6133279 0.605478 0.61792636 0.641132
## err 0.09919049 0.008657596 0.110594794 0.09572491 0.09483961 0.10553231 0.08926081
## err_count 213.4 18.649397 238.0 206.0 204.0 227.0 192.0
##
## ---
## mean sd cv_1_valid cv_2_valid cv_3_valid cv_4_valid cv_5_valid
## pr_auc 0.6195214 0.013277665 0.6197426 0.6133279 0.605478 0.61792636 0.641132
## precision 0.5740481 0.039661285 0.52678573 0.58389264 0.5962963 0.5406977 0.6225681
## r2 0.36663377 0.010435288 0.35770705 0.37113935 0.356223 0.36639416 0.38170534
## recall 0.67137253 0.043507267 0.69140625 0.6796875 0.62890625 0.7294118 0.627451
## rmse 0.25749722 0.0022794479 0.25945586 0.2567285 0.25980765 0.25731105 0.25418305
## specificity 0.931745 0.014887195 0.91613925 0.93459916 0.9424802 0.9166667 0.94883966
# We want to see how it performs for the testing data frame
# Make sure to convert it to an h20 object
h2o.performance(drf_h2o, newdata = as.h2o(test_tbl))
##
|
| | 0%
|
|====================================================================================================| 100%
## H2OBinomialMetrics: drf
##
## MSE: 0.06467491
## RMSE: 0.2543126
## LogLoss: 0.3412506
## Mean Per-Class Error: 0.1885317
## AUC: 0.8991278
## AUCPR: 0.603723
## Gini: 0.7982556
## R^2: 0.3717561
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 3933 275 0.065352 =275/4208
## Yes 173 382 0.311712 =173/555
## Totals 4106 657 0.094058 =448/4763
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.335190 0.630363 171
## 2 max f2 0.203729 0.703076 238
## 3 max f0point5 0.439373 0.631385 131
## 4 max accuracy 0.439373 0.913920 131
## 5 max precision 0.938889 0.904762 5
## 6 max recall 0.000002 1.000000 399
## 7 max specificity 1.000000 0.999525 0
## 8 max absolute_mcc 0.335190 0.579610 171
## 9 max min_per_class_accuracy 0.200000 0.830798 240
## 10 max mean_per_class_accuracy 0.147428 0.842385 264
## 11 max tns 1.000000 4206.000000 0
## 12 max fns 1.000000 543.000000 0
## 13 max fps 0.000002 4208.000000 399
## 14 max tps 0.000002 555.000000 399
## 15 max tnr 1.000000 0.999525 0
## 16 max fnr 1.000000 0.978378 0
## 17 max fpr 0.000002 1.000000 399
## 18 max tpr 0.000002 1.000000 399
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
drf_grid_02 <- h2o.grid(
# See help page for available algorithms via ?h2o.grid()
algorithm = "randomForest",
# Use the same as the object
grid_id = "drf_grid_02",
# predictor and response variables
x = x,
y = y,
# training and validation frame and crossfold validation
training_frame = train_h2o,
validation_frame = valid_h2o,
nfolds = 5,
# Hyperparamters: Use drf_h2o@allparameters to see all
hyper_params = list(
# Use different number of trees to find a better model
ntrees = c(5, 10, 15, 20, 50, 60, 70, 120, 140, 160, 250)
)
)
# Ordered by increasing logloss
drf_grid_02
## H2O Grid Details
## ================
##
## Grid ID: drf_grid_02
## Used hyper parameters:
## - ntrees
## Number of models: 11
## Number of failed models: 0
##
## Hyper-Parameter Search Summary: ordered by increasing logloss
## ntrees model_ids logloss
## 1 50 drf_grid_02_model_5 0.20194369527162748
## 2 160 drf_grid_02_model_10 0.20495648940884076
## 3 250 drf_grid_02_model_11 0.20561302478203666
## 4 140 drf_grid_02_model_9 0.20595486959240283
## 5 70 drf_grid_02_model_7 0.20610204607635804
## 6 120 drf_grid_02_model_8 0.20621354356748559
## 7 60 drf_grid_02_model_6 0.20873422999843805
## 8 20 drf_grid_02_model_4 0.22515542402254943
## 9 15 drf_grid_02_model_3 0.24866723889190218
## 10 10 drf_grid_02_model_2 0.2565587094120526
## 11 5 drf_grid_02_model_1 0.3072671008697041
# Ordered by decreasing auc
h2o.getGrid(grid_id = "drf_grid_02", sort_by = "auc", decreasing = TRUE)
## H2O Grid Details
## ================
##
## Grid ID: drf_grid_02
## Used hyper parameters:
## - ntrees
## Number of models: 11
## Number of failed models: 0
##
## Hyper-Parameter Search Summary: ordered by decreasing auc
## ntrees model_ids auc
## 1 50 drf_grid_02_model_5 0.9418355557734823
## 2 250 drf_grid_02_model_11 0.9410000047877848
## 3 160 drf_grid_02_model_10 0.940474462864208
## 4 120 drf_grid_02_model_8 0.9399775238270711
## 5 70 drf_grid_02_model_7 0.9387190380977241
## 6 140 drf_grid_02_model_9 0.9378187281959743
## 7 60 drf_grid_02_model_6 0.9349936462794538
## 8 20 drf_grid_02_model_4 0.9167968448828734
## 9 5 drf_grid_02_model_1 0.8997704917599748
## 10 15 drf_grid_02_model_3 0.8970943264585697
## 11 10 drf_grid_02_model_2 0.8953872335535878
drf_grid_02_model_5 <- h2o.getModel("drf_grid_02_model_5")
drf_grid_02_model_5 %>% h2o.auc(train = T, valid = T, xval = T)
## train valid xval
## 0.9346191 0.9480377 0.9418356
# The model is not overfitting because there's a small difference between the
# training AUC and the validation / cross validation AUC
# Run it with test data and compare to the results from "drf_h2o" model above
drf_grid_02_model_5 %>%
h2o.performance(newdata = as.h2o(test_tbl))
##
|
| | 0%
|
|====================================================================================================| 100%
## H2OBinomialMetrics: drf
##
## MSE: 0.05749836
## RMSE: 0.2397882
## LogLoss: 0.1975977
## Mean Per-Class Error: 0.1533859
## AUC: 0.9396469
## AUCPR: 0.71996
## Gini: 0.8792938
## R^2: 0.4414682
##
## Confusion Matrix (vertical: actual; across: predicted) for F1-optimal threshold:
## No Yes Error Rate
## No 3971 237 0.056321 =237/4208
## Yes 139 416 0.250450 =139/555
## Totals 4110 653 0.078942 =376/4763
##
## Maximum Metrics: Maximum metrics at their respective thresholds
## metric threshold value idx
## 1 max f1 0.302477 0.688742 176
## 2 max f2 0.208246 0.745628 223
## 3 max f0point5 0.424131 0.719194 128
## 4 max accuracy 0.424131 0.929876 128
## 5 max precision 0.948267 1.000000 0
## 6 max recall 0.010264 1.000000 379
## 7 max specificity 0.948267 1.000000 0
## 8 max absolute_mcc 0.302477 0.646671 176
## 9 max min_per_class_accuracy 0.181079 0.864865 237
## 10 max mean_per_class_accuracy 0.144739 0.870642 257
## 11 max tns 0.948267 4208.000000 0
## 12 max fns 0.948267 554.000000 0
## 13 max fps 0.000223 4208.000000 399
## 14 max tps 0.010264 555.000000 379
## 15 max tnr 0.948267 1.000000 0
## 16 max fnr 0.948267 0.998198 0
## 17 max fpr 0.000223 1.000000 399
## 18 max tpr 0.010264 1.000000 379
##
## Gains/Lift Table: Extract with `h2o.gainsLift(<model>, <data>)` or `h2o.gainsLift(<model>, valid=<T/F>, xval=<T/F>)`
# Loading top H2O model
stacked_ensemble_h2o <- h2o.loadModel("../03_ml_automated/modeling/h2o_models/StackedEnsemble_AllModels_AutoML_20201229_021944")
performance_h2o <- h2o.performance(stacked_ensemble_h2o, newdata = as.h2o(test_tbl))
typeof(performance_h2o)
## [1] "S4"
performance_h2o %>% slotNames()
## [1] "algorithm" "on_train" "on_valid" "on_xval" "metrics"
performance_tbl <- performance_h2o %>%
h2o.metric() %>%
as.tibble()
performance_tbl %>%
glimpse()
## Rows: 400
## Columns: 20
## $ threshold [3m[38;5;246m<dbl>[39m[23m 0.9889336, 0.9819733, 0.9785107, 0.9760445, 0.9714268, 0.9697713, 0.9680889…
## $ f1 [3m[38;5;246m<dbl>[39m[23m 0.01075269, 0.02135231, 0.02836879, 0.04569420, 0.06271777, 0.06944444, 0.0…
## $ f2 [3m[38;5;246m<dbl>[39m[23m 0.006747638, 0.013471037, 0.017945267, 0.029095792, 0.040196516, 0.04462293…
## $ f0point5 [3m[38;5;246m<dbl>[39m[23m 0.02645503, 0.05145798, 0.06768190, 0.10638298, 0.14263074, 0.15649452, 0.1…
## $ accuracy [3m[38;5;246m<dbl>[39m[23m 0.8841067, 0.8845266, 0.8849465, 0.8859962, 0.8870460, 0.8874659, 0.8880957…
## $ precision [3m[38;5;246m<dbl>[39m[23m 1.0000000, 0.8571429, 0.8888889, 0.9285714, 0.9473684, 0.9523810, 0.9583333…
## $ recall [3m[38;5;246m<dbl>[39m[23m 0.005405405, 0.010810811, 0.014414414, 0.023423423, 0.032432432, 0.03603603…
## $ specificity [3m[38;5;246m<dbl>[39m[23m 1.0000000, 0.9997624, 0.9997624, 0.9997624, 0.9997624, 0.9997624, 0.9997624…
## $ absolute_mcc [3m[38;5;246m<dbl>[39m[23m 0.06912713, 0.08855632, 0.10473959, 0.13741716, 0.16387806, 0.17336342, 0.1…
## $ min_per_class_accuracy [3m[38;5;246m<dbl>[39m[23m 0.005405405, 0.010810811, 0.014414414, 0.023423423, 0.032432432, 0.03603603…
## $ mean_per_class_accuracy [3m[38;5;246m<dbl>[39m[23m 0.5027027, 0.5052866, 0.5070884, 0.5115929, 0.5160974, 0.5178992, 0.5206019…
## $ tns [3m[38;5;246m<dbl>[39m[23m 4208, 4207, 4207, 4207, 4207, 4207, 4207, 4206, 4206, 4205, 4205, 4205, 420…
## $ fns [3m[38;5;246m<dbl>[39m[23m 552, 549, 547, 542, 537, 535, 532, 529, 523, 522, 516, 513, 511, 508, 507, …
## $ fps [3m[38;5;246m<dbl>[39m[23m 0, 1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 5, 6, 6, 9, 10, 11, 12, 12, 12, 13, …
## $ tps [3m[38;5;246m<dbl>[39m[23m 3, 6, 8, 13, 18, 20, 23, 26, 32, 33, 39, 42, 44, 47, 48, 53, 54, 56, 60, 64…
## $ tnr [3m[38;5;246m<dbl>[39m[23m 1.0000000, 0.9997624, 0.9997624, 0.9997624, 0.9997624, 0.9997624, 0.9997624…
## $ fnr [3m[38;5;246m<dbl>[39m[23m 0.9945946, 0.9891892, 0.9855856, 0.9765766, 0.9675676, 0.9639640, 0.9585586…
## $ fpr [3m[38;5;246m<dbl>[39m[23m 0.0000000000, 0.0002376426, 0.0002376426, 0.0002376426, 0.0002376426, 0.000…
## $ tpr [3m[38;5;246m<dbl>[39m[23m 0.005405405, 0.010810811, 0.014414414, 0.023423423, 0.032432432, 0.03603603…
## $ idx [3m[38;5;246m<int>[39m[23m 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 2…
theme_new <- theme(
legend.position = "bottom",
legend.title = element_text(size = 11),
legend.text = element_text(size = 9),
legend.key = element_blank(),
panel.background = element_rect(fill = "transparent"),
panel.border = element_rect(color = "black", fill = NA, size = 0.5),
panel.grid.major = element_line(color = "grey", size = 0.333)
)
performance_tbl %>%
filter(f1 == max(f1))
performance_tbl %>%
ggplot(aes(x = threshold)) +
geom_line(aes(y = precision, color = "Precision"), size = 0.5) +
geom_line(aes(y = recall, color = "Recall"), size = 0.5) +
scale_color_manual(breaks = c("Precision", "Recall"),
values = c("blue", "red")) +
# Insert line where precision and recall are harmonically optimized
geom_vline(xintercept = h2o.find_threshold_by_max_metric(performance_h2o, "f1")) +
labs(
title = "Precision vs. Recall",
y = "Value",
x = "Threshold") +
theme_new
load_model_performance_metrics <- function(path, test_tbl) {
model_h2o <- h2o.loadModel(path)
perf_h2o <- h2o.performance(model_h2o, newdata = as.h2o(test_tbl))
perf_h2o %>%
h2o.metric() %>%
as_tibble() %>%
mutate(auc = h2o.auc(perf_h2o)) %>%
select(tpr, fpr, auc)
}
model_metrics_tbl <- fs::dir_info(path = "../03_ml_automated/modeling/h2o_models/") %>%
select(path) %>%
mutate(metrics = map(path, load_model_performance_metrics, test_tbl)) %>%
unnest(cols = metrics)
model_metrics_tbl %>%
arrange(desc(auc)) %>%
mutate(
# Extract the model names
PATH = str_split(path, pattern = "/", simplify = T)[,5] %>% as_factor(),
AUC = auc %>% round(3) %>% as.character() %>% as_factor()
) %>%
ggplot(aes(fpr, tpr, color = PATH, linetype = AUC)) +
geom_line(size = 0.75) +
scale_color_gdocs() +
# just for demonstration purposes
geom_abline(color = "black", linetype = "dotted", size = 0.75) +
theme_minimal() +
theme_new +
theme(legend.direction = "vertical") +
labs(title = "ROC (Receiver Operating Characteristic) Plot",
subtitle = "Performance of Top 3 & Bottom 3 Performing Models",
y = "TPR",
x = "FPR")
load_model_performance_metrics <- function(path, test_tbl) {
model_h2o <- h2o.loadModel(path)
perf_h2o <- h2o.performance(model_h2o, newdata = as.h2o(test_tbl))
perf_h2o %>%
h2o.metric() %>%
as_tibble() %>%
mutate(auc = h2o.auc(perf_h2o)) %>%
select(tpr, fpr, auc, precision, recall)
}
model_metrics_tbl <- fs::dir_info(path = "../03_ml_automated/modeling/h2o_models/") %>%
select(path) %>%
mutate(metrics = map(path, load_model_performance_metrics, test_tbl)) %>%
unnest(cols = metrics)
model_metrics_tbl %>%
arrange(desc(auc)) %>%
mutate(
# Extract the model names
PATH = str_split(path, pattern = "/", simplify = T)[,5] %>% as_factor(),
AUC = auc %>% round(3) %>% as.character() %>% as_factor()
) %>%
ggplot(aes(recall, precision, color = PATH, linetype = AUC)) +
geom_line(size = 0.75) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
theme(legend.direction = "vertical") +
labs(title = "Precision vs Recall Plot",
subtitle = "Performance of Top 3 & Bottom 3 Performing Models",
y = "Precision",
x = "Recall")
# Table for Gain and Lift plotting
gain_lift_tbl <- performance_h2o %>%
h2o.gainsLift() %>%
as.tibble()
## Gain Plot
gain_transformed_tbl <- gain_lift_tbl %>%
select(group, cumulative_data_fraction, cumulative_capture_rate, cumulative_lift) %>%
select(-contains("lift")) %>%
mutate(baseline = cumulative_data_fraction) %>%
rename(gain = cumulative_capture_rate) %>%
# prepare the data for the plotting (for the color and group aesthetics)
pivot_longer(cols = c(gain, baseline), values_to = "value", names_to = "key")
gain_transformed_tbl %>%
ggplot(aes(x = cumulative_data_fraction, y = value, color = key)) +
geom_line(size = 0.5) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
labs(title = "Gain Chart",
x = "Cumulative Data Fraction",
y = "Gain")
## Lift Plot
lift_transformed_tbl <- gain_lift_tbl %>%
select(group, cumulative_data_fraction, cumulative_capture_rate, cumulative_lift) %>%
select(-contains("capture")) %>%
mutate(baseline = 1) %>%
rename(lift = cumulative_lift) %>%
pivot_longer(cols = c(lift, baseline), values_to = "value", names_to = "key")
lift_transformed_tbl %>%
ggplot(aes(x = cumulative_data_fraction, y = value, color = key)) +
geom_line(size = 0.5) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
labs(title = "Lift Chart",
x = "Cumulative Data Fraction",
y = "Lift")
plot_h2o_performance <- function(h2o_leaderboard, newdata, order_by = c("auc", "logloss"),
top_models = 2, bottom_models = 2, size = 1.5) {
# Inputs
leaderboard_tbl <- h2o_leaderboard %>%
as_tibble() %>%
slice(1:top_models,(n()-bottom_models+1):n())
newdata_tbl <- newdata %>%
as_tibble()
# Selecting the first, if nothing is provided
order_by <- tolower(order_by[[1]])
# Convert string stored in a variable to column name (symbol)
order_by_expr <- rlang::sym(order_by)
# Turn of the progress bars ( opposite h2o.show_progress())
h2o.no_progress()
# 1. Model Metrics
get_model_performance_metrics <- function(model_id, test_tbl) {
model_h2o <- h2o.getModel(model_id)
perf_h2o <- h2o.performance(model_h2o, newdata = as.h2o(test_tbl))
perf_h2o %>%
h2o.metric() %>%
as.tibble() %>%
select(threshold, tpr, fpr, precision, recall)
}
model_metrics_tbl <- leaderboard_tbl %>%
mutate(metrics = map(model_id, get_model_performance_metrics, newdata_tbl)) %>%
unnest(cols = metrics) %>%
mutate(model_id = as_factor(model_id) %>%
# programmatically reorder factors depending on order_by
fct_reorder(!! order_by_expr,
.desc = ifelse(order_by == "auc", TRUE, FALSE)),
auc = auc %>%
round(3) %>%
as.character() %>%
as_factor() %>%
fct_reorder(as.numeric(model_id)),
logloss = logloss %>%
round(4) %>%
as.character() %>%
as_factor() %>%
fct_reorder(as.numeric(model_id)))
## 1A. ROC Plot
p1 <- model_metrics_tbl %>%
ggplot(aes(fpr, tpr, color = model_id, linetype = !! order_by_expr)) +
geom_line(size = size) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
labs(title = "ROC", x = "FPR", y = "TPR") +
theme(legend.direction = "vertical")
## 1B. Precision vs Recall
p2 <- model_metrics_tbl %>%
ggplot(aes(recall, precision, color = model_id, linetype = !! order_by_expr)) +
geom_line(size = size) +
scale_color_gdocs() +
theme_minimal() +
theme_new +
labs(title = "Precision Vs Recall", x = "Recall", y = "Precision") +
theme(legend.position = "none")
## 2. Gain / Lift
get_gain_lift <- function(model_id, test_tbl) {
model_h2o <- h2o.getModel(model_id)
perf_h2o <- h2o.performance(model_h2o, newdata = as.h2o(test_tbl))
perf_h2o %>%
h2o.gainsLift() %>%
as.tibble() %>%
select(group, cumulative_data_fraction, cumulative_capture_rate, cumulative_lift)
}
gain_lift_tbl <- leaderboard_tbl %>%
mutate(metrics = map(model_id, get_gain_lift, newdata_tbl)) %>%
unnest(cols = metrics) %>%
mutate(model_id = as_factor(model_id) %>%
fct_reorder(!! order_by_expr,
.desc = ifelse(order_by == "auc", TRUE, FALSE)),
auc = auc %>%
round(3) %>%
as.character() %>%
as_factor() %>%
fct_reorder(as.numeric(model_id)),
logloss = logloss %>%
round(4) %>%
as.character() %>%
as_factor() %>%
fct_reorder(as.numeric(model_id))) %>%
rename(gain = cumulative_capture_rate,
lift = cumulative_lift)
## 2A. Gain Plot
p3 <- gain_lift_tbl %>%
ggplot(aes(cumulative_data_fraction, gain,
color = model_id, linetype = !! order_by_expr)) +
geom_line(size = size,) +
geom_segment(x = 0, y = 0, xend = 1, yend = 1,
color = "red", size = size, linetype = "dotted") +
scale_color_gdocs() +
theme_minimal() +
theme_new +
expand_limits(x = c(0, 1), y = c(0, 1)) +
labs(title = "Gain", x = "Cumulative Data Fraction", y = "Gain") +
theme(legend.position = "none")
## 2B. Lift Plot
p4 <- gain_lift_tbl %>%
ggplot(aes(cumulative_data_fraction, lift,
color = model_id, linetype = !! order_by_expr)) +
geom_line(size = size) +
geom_segment(x = 0, y = 1, xend = 1, yend = 1,
color = "red", size = size, linetype = "dotted") +
scale_color_gdocs() +
theme_minimal() +
theme_new +
expand_limits(x = c(0, 1), y = c(0, 1)) +
labs(title = "Lift", x = "Cumulative Data Fraction", y = "Lift") +
theme(legend.position = "none")
### Combine using cowplot
# cowplot::get_legend extracts a legend from a ggplot object
p_legend <- get_legend(p1)
# Remove legend from p1
p1 <- p1 + theme(legend.position = "none")
# cowplot::plt_grid() combines multiple ggplots into a single cowplot object
p <- cowplot::plot_grid(p1, p2, p3, p4, ncol = 2)
# cowplot::ggdraw() sets up a drawing layer
p_title <- ggdraw() +
# cowplot::draw_label() draws text on a ggdraw layer / ggplot object
draw_label(glue("Metrics for Top {top_models} & Bottom {bottom_models} H2O Models"),
size = 18, fontface = "bold", color = "#2C3E50")
p_subtitle <- ggdraw() +
draw_label(glue("Ordered by {toupper(order_by)}"),
size = 10, color = "#2C3E50")
# Combine everything
ret <- plot_grid(p_title, p_subtitle, p, p_legend,
# Adjust the relative spacing, so that the legends always fits
ncol = 1, rel_heights = c(0.05, 0.05, 1, 0.05 * (top_models + bottom_models)))
h2o.show_progress()
return(ret)
}
automl_models_h2o@leaderboard %>%
plot_h2o_performance(newdata = test_tbl, order_by = "logloss",
size = 0.75, bottom_models = 5, top_models = 5)